gitextract_u554eb9v/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.yml
│   │   ├── config.yml
│   │   ├── documentation.yml
│   │   └── feature-request.yml
│   ├── pull_request_template.md
│   ├── stale_issue_mark_close_workflow.yml
│   └── workflows/
│       ├── acknowledge-new-issue.yml
│       └── auto-label-issues.yml
├── .gitignore
├── .readthedocs.yml
├── CODEOWNERS
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE-DOCUMENTATION
├── LICENSE-SAMPLECODE
├── LICENSE-SUMMARY-DOCS-SAMPLES
├── Makefile
├── README.md
├── _backup-setup/
│   └── neuron-setup/
│       ├── multiframework/
│       │   ├── multi-framework-ubuntu22-neuron-dlami.rst
│       │   └── multi-framework-ubuntu24-neuron-dlami.rst
│       └── pytorch/
│           ├── neuron/
│           │   ├── amazon-linux/
│           │   │   ├── torch-neuron-al2-base-dlami.rst
│           │   │   ├── torch-neuron-al2-pytorch-dlami.rst
│           │   │   ├── torch-neuron-al2.rst
│           │   │   └── torch-neuron-al2023.rst
│           │   └── ubuntu/
│           │       ├── torch-neuron-ubuntu20-base-dlami.rst
│           │       ├── torch-neuron-ubuntu20-pytorch-dlami.rst
│           │       ├── torch-neuron-ubuntu20.rst
│           │       └── torch-neuron-ubuntu22.rst
│           └── neuronx/
│               ├── amazon-linux/
│               │   ├── torch-neuronx-al2-base-dlami.rst
│               │   ├── torch-neuronx-al2-pytorch-dlami.rst
│               │   ├── torch-neuronx-al2.rst
│               │   └── torch-neuronx-al2023.rst
│               └── ubuntu/
│                   ├── torch-neuronx-ubuntu20-base-dlami.rst
│                   ├── torch-neuronx-ubuntu20-pytorch-dlami.rst
│                   ├── torch-neuronx-ubuntu20.rst
│                   ├── torch-neuronx-ubuntu22.rst
│                   └── torch-neuronx-ubuntu24.rst
├── _content-types/
│   ├── conceptual-deep-dive.rst
│   ├── model-card.rst
│   ├── procedural-how-to.rst
│   ├── procedural-tutorial.ipynb
│   ├── reference-kernel-api.rst
│   └── release-notes-templates/
│       ├── compiler.rst
│       ├── containers.rst
│       ├── dlami.rst
│       ├── index.rst
│       ├── nki.rst
│       ├── nx-jax.rst
│       ├── nx-pytorch.rst
│       ├── nxd-core.rst
│       ├── nxd-inference.rst
│       ├── nxd-training.rst
│       ├── runtime.rst
│       └── tools.rst
├── _ext/
│   ├── archive.py
│   ├── df_tables.py
│   ├── local_documenter.py
│   ├── neuron_tag.py
│   ├── release-notes-automation-spec.md
│   ├── release-notes-context.md
│   ├── sphinx_plotly_directive.py
│   └── symlink.py
├── _static/
│   └── css/
│       ├── custom.css
│       └── custom.css.new
├── _templates/
│   ├── recentposts.html
│   ├── search-field.html
│   ├── search-google.html
│   └── search.html
├── _utilities/
│   ├── JIRA_SETUP_QUICKSTART.md
│   ├── add_meta.py
│   ├── audit_frameworks.py
│   ├── check_urls.sh
│   ├── create_sitemap.py
│   ├── format_build_logs.py
│   ├── inject_archive_meta.py
│   ├── metadata_schema.yaml
│   ├── migrate_setup_content.py
│   ├── old-nki-apis.txt
│   └── setup_jira_token.sh
├── about-neuron/
│   ├── amazonq-getstarted.rst
│   ├── announcements/
│   │   ├── index.rst
│   │   ├── neuron1.x/
│   │   │   ├── announce-eol-mx-before-1-5.rst
│   │   │   ├── announce-eol-pt-1-5.rst
│   │   │   ├── announce-eol-pt-before-1-8.rst
│   │   │   ├── announce-eol-tf-before-2-5.rst
│   │   │   ├── announce-eol-tf-before-2-7.rst
│   │   │   ├── announcements.rst
│   │   │   ├── eol-ncgs-env_2.rst
│   │   │   ├── eol-pt-15.rst
│   │   │   └── eol-tf-21-24.rst
│   │   └── neuron2.x/
│   │       ├── announce-component-change.rst
│   │       ├── announce-correction-neuron-driver-support-inf1.rst
│   │       ├── announce-deprecation-containers-rtd.rst
│   │       ├── announce-deprecation-nxd-path-trace-api.rst
│   │       ├── announce-deprecation-transformer-flag.rst
│   │       ├── announce-eol-megatron-lm.rst
│   │       ├── announce-eol-python-3-7.rst
│   │       ├── announce-eol-ubuntu-18.rst
│   │       ├── announce-eos-al2.rst
│   │       ├── announce-eos-beta-pytorch-neuroncore-placement-apis.rst
│   │       ├── announce-eos-bf16-vars.rst
│   │       ├── announce-eos-block-dimension-nki.rst
│   │       ├── announce-eos-dlami-ubuntu-22-04.rst
│   │       ├── announce-eos-dlami.rst
│   │       ├── announce-eos-inf1-virtual-environments.rst
│   │       ├── announce-eos-jax-neuronx-nki-call.rst
│   │       ├── announce-eos-megatronlm-2-13.rst
│   │       ├── announce-eos-mllama-checkpoint.rst
│   │       ├── announce-eos-multiframework-dlamis-inf1.rst
│   │       ├── announce-eos-nemo.rst
│   │       ├── announce-eos-neuron-det.rst
│   │       ├── announce-eos-neuron-driver-support-inf1.rst
│   │       ├── announce-eos-neuron-profiler-2.rst
│   │       ├── announce-eos-neuron-profiler-v230.rst
│   │       ├── announce-eos-neuron-profiler.rst
│   │       ├── announce-eos-neurondevice-version.rst
│   │       ├── announce-eos-neurondevice.rst
│   │       ├── announce-eos-nxd-examples.rst
│   │       ├── announce-eos-nxdt-nxd-core-training.rst
│   │       ├── announce-eos-probuf.rst
│   │       ├── announce-eos-pt-versions.rst
│   │       ├── announce-eos-pt2.rst
│   │       ├── announce-eos-python38.rst
│   │       ├── announce-eos-pytorch-1-1-3.rst
│   │       ├── announce-eos-pytorch-1-9.rst
│   │       ├── announce-eos-pytorch-2-1.rst
│   │       ├── announce-eos-pytorch-2-7-2-8-v229.rst
│   │       ├── announce-eos-pytorch-2-7-2-8.rst
│   │       ├── announce-eos-pytorch-profiling-api.rst
│   │       ├── announce-eos-tensorboard-tools.rst
│   │       ├── announce-eos-tensorflow-2-8-9.rst
│   │       ├── announce-eos-tensorflow-inf2.rst
│   │       ├── announce-eos-tensorflow1-x.rst
│   │       ├── announce-eos-torch-neuron.rst
│   │       ├── announce-eos-torch-neuronx-nki-jit.rst
│   │       ├── announce-eos-u20-dlamis.rst
│   │       ├── announce-eos-xla-bf16.rst
│   │       ├── announce-intent-eol-nemo-arg.rst
│   │       ├── announce-intent-eos-opt.rst
│   │       ├── announce-intent-eos-pt-version.rst
│   │       ├── announce-intent-eos-pt2-6.rst
│   │       ├── announce-intent-eos-tensorflow-tutorial-inf.rst
│   │       ├── announce-intent-eos-tnx.rst
│   │       ├── announce-intent-maintenance-tnx.rst
│   │       ├── announce-maintenance-mxnet.rst
│   │       ├── announce-maintenance-nxdi-nxd-core-inference.rst
│   │       ├── announce-maintenance-nxdt-nxd-core-training.rst
│   │       ├── announce-maintenance-tf.rst
│   │       ├── announce-moving-samples.rst
│   │       ├── announce-nki-library-namespace-changes-2-28.rst
│   │       ├── announce-nki-namespace-migration.rst
│   │       ├── announce-no-longer-support-neuron-det.rst
│   │       ├── announce-no-longer-support-nxd-examples.rst
│   │       ├── announce-no-longer-support-pytorch-113.rst
│   │       ├── announce-no-longer-support-pytorch-2-1.rst
│   │       ├── announce-no-longer-support-pytorch-2-7-2-8.rst
│   │       ├── announce-no-longer-support-tensorflow-inf2.rst
│   │       ├── announce-no-longer-support-u20-dlc-dlami.rst
│   │       ├── announce-no-support-al2.rst
│   │       ├── announce-no-support-device-version.rst
│   │       ├── announce-no-support-jax-neuronx-nki-call.rst
│   │       ├── announce-no-support-llama3-2-checkpoint.rst
│   │       ├── announce-no-support-nemo-megatron.rst
│   │       ├── announce-no-support-neurondevice.rst
│   │       ├── announce-no-support-nki-jit-torch.rst
│   │       ├── announce-no-support-tensorboard-plugin.rst
│   │       ├── announce-no-support-tensorflow1-x.rst
│   │       ├── announce-no-support-tensorflow2-10.rst
│   │       ├── announce-no-support-tf-versions.rst
│   │       ├── announce-no-support-torch-neuron-versions.rst
│   │       ├── announce-no-support-ubuntu-20-base.rst
│   │       ├── announce-no-support-vllm-v0.rst
│   │       ├── announce-nxdi-changes.rst
│   │       ├── announce-package-change.rst
│   │       ├── announce-python38-no-longer-support.rst
│   │       ├── announce-transition-pytorch-trainium.rst
│   │       ├── announcement-end-of-support-neuronxcc-nki.rst
│   │       ├── announcement-end-of-support-nxdt-nxd-core.rst
│   │       ├── announcement-end-of-support-parallel-model-trace.rst
│   │       ├── announcement-end-of-support-pytorch-2-6.rst
│   │       ├── announcement-end-of-support-vllm-v0.rst
│   │       ├── announcement-nki-library-kernel-migration.rst
│   │       ├── announcement-nki-library-namespace-changes.rst
│   │       ├── announcement-python-3-9-eol.rst
│   │       ├── dlami-neuron-2.10.rst
│   │       ├── dlami-neuron-2.12.rst
│   │       ├── dlami-pytorch-introduce.rst
│   │       ├── end-of-support-pt2.rst
│   │       ├── github-changes.rst
│   │       ├── gpg-expiration.rst
│   │       ├── neuron-rtd-eol.rst
│   │       ├── neuron2-intro.rst
│   │       ├── neuron230-packages-changes.rst
│   │       ├── neuron250-packages-changes.rst
│   │       ├── release-neuron2.4.rst
│   │       ├── sm-training-dlc-2.9.1.rst
│   │       └── sm-training-trn1-introduce.rst
│   ├── appnotes/
│   │   ├── index.rst
│   │   ├── mxnet-neuron/
│   │   │   └── flex-eg.rst
│   │   ├── neuron-cc/
│   │   │   └── mixed-precision.rst
│   │   ├── neuron1x/
│   │   │   ├── important-neuronx-dkms.txt
│   │   │   └── introducing-libnrt.rst
│   │   ├── neuronx-cc/
│   │   │   └── neuronx-cc-training-mixed-precision.rst
│   │   ├── neuronx-distributed/
│   │   │   ├── introducing-nxd-inference.rst
│   │   │   └── introducing-nxdt-training.rst
│   │   ├── perf/
│   │   │   └── neuron-cc/
│   │   │       ├── parallel-ncgs.rst
│   │   │       └── performance-tuning.rst
│   │   ├── torch-neuron/
│   │   │   ├── bucketing-app-note.rst
│   │   │   ├── index.rst
│   │   │   ├── rcnn-app-note.rst
│   │   │   └── torch-neuron-dataparallel-app-note.rst
│   │   ├── torch-neuronx/
│   │   │   ├── index.rst
│   │   │   ├── introducing-pytorch-2-6.rst
│   │   │   ├── introducing-pytorch-2-7.rst
│   │   │   ├── introducing-pytorch-2-8.rst
│   │   │   ├── introducing-pytorch-2-9.rst
│   │   │   ├── introducing-pytorch-2-x.rst
│   │   │   ├── migration-from-xla-downcast-bf16.rst
│   │   │   ├── torch-neuronx-dataparallel-app-note.rst
│   │   │   └── torch-neuronx-graph-partitioner-app-note.rst
│   │   └── transformers-neuronx/
│   │       └── generative-llm-inference-with-neuron.rst
│   ├── arch/
│   │   ├── glossary.rst
│   │   ├── index.rst
│   │   ├── neuron-features/
│   │   │   ├── custom-c++-operators.rst
│   │   │   ├── data-types.rst
│   │   │   ├── index.rst
│   │   │   ├── logical-neuroncore-config.rst
│   │   │   ├── neuron-caching.rst
│   │   │   ├── neuroncore-batching.rst
│   │   │   ├── neuroncore-pipeline.rst
│   │   │   └── rounding-modes.rst
│   │   └── neuron-hardware/
│   │       ├── inf1-arch.rst
│   │       ├── inf2-arch.rst
│   │       ├── inferentia.rst
│   │       ├── inferentia2.rst
│   │       ├── neuron-core-v1.rst
│   │       ├── neuron-core-v2.rst
│   │       ├── neuron-core-v3.rst
│   │       ├── neuron-core-v4.rst
│   │       ├── trainium.rst
│   │       ├── trainium2.rst
│   │       ├── trainium3.rst
│   │       ├── trn1-arch.rst
│   │       ├── trn2-arch.rst
│   │       └── trn3-arch.rst
│   ├── benchmarks/
│   │   ├── index.rst
│   │   ├── inf1/
│   │   │   ├── data.csv
│   │   │   ├── index.rst
│   │   │   ├── instance_prices.csv
│   │   │   ├── latency_data_encoder.csv
│   │   │   ├── throughput_data_cnn.csv
│   │   │   └── throughput_data_encoder.csv
│   │   ├── inf2/
│   │   │   ├── inf2-performance.rst
│   │   │   ├── inf2_instance_prices.csv
│   │   │   ├── latency_data_decoder.csv
│   │   │   ├── latency_data_encoder.csv
│   │   │   ├── latency_data_encoder_decoder.csv
│   │   │   ├── latency_data_vision.csv
│   │   │   ├── latency_data_vision_cnn.csv
│   │   │   ├── latency_data_vision_dit.csv
│   │   │   ├── latency_data_vision_sd.csv
│   │   │   ├── latency_data_vision_transformers.csv
│   │   │   ├── throughput_data_decoder.csv
│   │   │   ├── throughput_data_encoder.csv
│   │   │   ├── throughput_data_encoder_decoder.csv
│   │   │   ├── throughput_data_vision.csv
│   │   │   ├── throughput_data_vision_cnn.csv
│   │   │   ├── throughput_data_vision_dit.csv
│   │   │   ├── throughput_data_vision_sd.csv
│   │   │   └── throughput_data_vision_transformers.csv
│   │   └── trn1/
│   │       ├── latency_data_decoder.csv
│   │       ├── latency_data_encoder.csv
│   │       ├── latency_data_encoder_decoder.csv
│   │       ├── throughput_data_decoder.csv
│   │       ├── throughput_data_encoder.csv
│   │       ├── throughput_data_encoder_decoder.csv
│   │       ├── training_data_decoder.csv
│   │       ├── training_data_encoder.csv
│   │       ├── training_data_vision_transformers.csv
│   │       ├── trn1-inference-performance.rst
│   │       ├── trn1-training-performance.rst
│   │       ├── trn1_instance_prices.csv
│   │       └── trn1_trn1n_nlp_data.csv
│   ├── beta-participation.rst
│   ├── calculator/
│   │   └── neuron-calculator.rst
│   ├── faq/
│   │   ├── contributing-faq.rst
│   │   ├── index.rst
│   │   ├── inference/
│   │   │   ├── neuron-faq.rst
│   │   │   └── trouble-shooting-faq.rst
│   │   ├── neuron2-intro-faq.rst
│   │   ├── onnx-faq.rst
│   │   ├── roadmap-faq.rst
│   │   └── training/
│   │       └── neuron-training.rst
│   ├── faq.rst
│   ├── index.rst
│   ├── models/
│   │   ├── index.rst
│   │   ├── inference-inf1-samples.rst
│   │   ├── inference-inf2-trn1-samples.rst
│   │   └── training-trn1-samples.rst
│   ├── monitoring-tools.rst
│   ├── news-and-blogs/
│   │   ├── CONTRIBUTING.md
│   │   ├── JIRA-INTEGRATION-DESIGN.md
│   │   ├── README.md
│   │   ├── article-template.yaml
│   │   ├── index.rst
│   │   ├── news-and-blogs.yaml
│   │   └── validate_articles.py
│   ├── oss/
│   │   └── index.rst
│   ├── profiling-tools.rst
│   ├── quick-start/
│   │   ├── _specs/
│   │   │   └── REFACTORING_NOTES.md
│   │   ├── docs-quicklinks.rst
│   │   ├── github-samples.rst
│   │   ├── index.rst
│   │   ├── inference-quickstart.rst
│   │   ├── mxnet-neuron.rst
│   │   ├── tab-inference-tensorflow-neuron.rst
│   │   ├── tensorflow-neuron.rst
│   │   ├── torch-neuron-tab-training.rst
│   │   ├── torch-neuron.rst
│   │   ├── training-quickstart.rst
│   │   └── user-guide-quickstart.rst
│   ├── sdk-policy.rst
│   ├── security.rst
│   ├── troubleshooting.rst
│   ├── what-is-neuron.rst
│   └── whats-new.rst
├── archive/
│   ├── helper-tools/
│   │   ├── index.rst
│   │   ├── tutorial-neuron-check-model.rst
│   │   └── tutorial-neuron-gatherinfo.rst
│   ├── index.rst
│   ├── mxnet-neuron/
│   │   ├── api-compilation-python-api.rst
│   │   ├── api-reference-guide.rst
│   │   ├── api-reference-guide.txt
│   │   ├── developer-guide.rst
│   │   ├── developer-guide.txt
│   │   ├── ec2-then-ec2-devflow.rst
│   │   ├── index.rst
│   │   ├── inference-mxnet-neuron.rst
│   │   ├── inference-mxnet-neuron.txt
│   │   ├── misc-mxnet-neuron.rst
│   │   ├── misc-mxnet-neuron.txt
│   │   ├── mxnet-neuron-setup.rst
│   │   ├── mxnet-neuron-setup.txt
│   │   ├── neo-then-hosting-devflow.rst
│   │   ├── setup/
│   │   │   ├── mxnet-install-prev-al2.rst
│   │   │   ├── mxnet-install-prev-al2023.rst
│   │   │   ├── mxnet-install-prev-u20.rst
│   │   │   ├── mxnet-install-prev-u22.rst
│   │   │   ├── mxnet-install.rst
│   │   │   ├── mxnet-neuron-al2-base-dlami.rst
│   │   │   ├── mxnet-neuron-al2.rst
│   │   │   ├── mxnet-neuron-al2023.rst
│   │   │   ├── mxnet-neuron-ubuntu20-base-dlami.rst
│   │   │   ├── mxnet-neuron-ubuntu20.rst
│   │   │   ├── mxnet-neuron-ubuntu22.rst
│   │   │   ├── mxnet-update-u20.rst
│   │   │   ├── mxnet-update.rst
│   │   │   ├── prev-releases/
│   │   │   │   ├── neuron-1.14.2-mxnet-install.rst
│   │   │   │   ├── neuron-1.15.0-mxnet-install.rst
│   │   │   │   ├── neuron-1.15.1-mxnet-install.rst
│   │   │   │   ├── neuron-1.15.2-mxnet-install.rst
│   │   │   │   ├── neuron-1.16.3-mxnet-install.rst
│   │   │   │   ├── neuron-1.17.2-mxnet-install.rst
│   │   │   │   ├── neuron-1.18.0-mxnet-install.rst
│   │   │   │   └── neuron-1.19.0-mxnet-install.rst
│   │   │   └── setup-inference
│   │   ├── troubleshooting-guide.rst
│   │   └── tutorials/
│   │       ├── mxnet-tutorial-setup.rst
│   │       ├── tutorial-model-serving.rst
│   │       ├── tutorials-mxnet-computervision.rst
│   │       ├── tutorials-mxnet-neuron.rst
│   │       ├── tutorials-mxnet-neuron.txt
│   │       ├── tutorials-mxnet-nlp.rst
│   │       └── tutorials-mxnet-utilizing-neuron-capabilities.rst
│   ├── neuronperf/
│   │   ├── index.rst
│   │   ├── neuronperf_api.rst
│   │   ├── neuronperf_benchmark_guide.rst
│   │   ├── neuronperf_compile_guide.rst
│   │   ├── neuronperf_evaluate_guide.rst
│   │   ├── neuronperf_examples.rst
│   │   ├── neuronperf_faq.rst
│   │   ├── neuronperf_framework_notes.rst
│   │   ├── neuronperf_install.rst
│   │   ├── neuronperf_model_index_guide.rst
│   │   ├── neuronperf_overview.rst
│   │   ├── neuronperf_terminology.rst
│   │   ├── neuronperf_troubleshooting.rst
│   │   ├── rn.rst
│   │   ├── setup.cfg
│   │   ├── setup.py
│   │   ├── test_resnet50_pt.py
│   │   └── test_simple_pt.py
│   ├── src/
│   │   └── benchmark/
│   │       └── pytorch/
│   │           ├── bert-base-cased_benchmark.py
│   │           ├── bert-base-cased_compile.py
│   │           ├── bert-base-uncased_benchmark.py
│   │           ├── bert-base-uncased_compile.py
│   │           ├── distilbert-base-uncased-finetuned-sst-2-english_benchmark.py
│   │           ├── distilbert-base-uncased-finetuned-sst-2-english_compile.py
│   │           ├── distilbert-base-uncased_benchmark.py
│   │           ├── distilbert-base-uncased_compile.py
│   │           ├── distilroberta-base_benchmark.py
│   │           ├── distilroberta-base_compile.py
│   │           ├── hf-google-vit_benchmark.py
│   │           ├── hf-openai-clip_benchmark.py
│   │           ├── hf_pretrained_wav2vec2_conformer_relpos_benchmark.py
│   │           ├── hf_pretrained_wav2vec2_conformer_rope_benchmark.py
│   │           ├── inf2_benchmark.py
│   │           ├── opt_benchmark.py
│   │           ├── perceiver-multimodal_benchmark.py
│   │           ├── perceiver-multimodal_compile.py
│   │           ├── perceiver-vision_benchmark.py
│   │           ├── perceiver-vision_compile.py
│   │           ├── pixart_alpha_benchmark.py
│   │           ├── pixart_sigma_benchmark.py
│   │           ├── resnet50_benchmark.py
│   │           ├── resnet50_compile.py
│   │           ├── resnet_benchmark.py
│   │           ├── resnet_compile.py
│   │           ├── sd2_512_benchmark.py
│   │           ├── sd2_512_compile.py
│   │           ├── sd2_768_benchmark.py
│   │           ├── sd2_768_compile.py
│   │           ├── sd2_inpainting_benchmark.py
│   │           ├── sd2_inpainting_inference.py
│   │           ├── sd_15_512_benchmark.py
│   │           ├── sd_15_512_compile.py
│   │           ├── sd_4x_upscaler_benchmark.py
│   │           ├── sd_4x_upscaler_compile.py
│   │           ├── sdxl_base_1024_benchmark.py
│   │           ├── sdxl_base_1024_compile.py
│   │           ├── sdxl_base_and_refiner_1024_benchmark.py
│   │           ├── sdxl_base_and_refiner_1024_compile.py
│   │           ├── unet_benchmark.py
│   │           ├── unet_compile.py
│   │           ├── vgg_benchmark.py
│   │           └── vgg_compile.py
│   ├── tensorboard/
│   │   └── getting-started-tensorboard-neuron-plugin.rst
│   ├── tensorflow/
│   │   ├── index.rst
│   │   ├── setup-legacy-inf1-tensorflow.rst
│   │   ├── tensorflow-neuron/
│   │   │   ├── additional-examples.rst
│   │   │   ├── additional-examples.txt
│   │   │   ├── api-auto-replication-api.rst
│   │   │   ├── api-compilation-python-api.rst
│   │   │   ├── api-reference-guide.rst
│   │   │   ├── api-reference-guide.txt
│   │   │   ├── api-tfn-analyze-model-api.rst
│   │   │   ├── api-tracing-python-api.rst
│   │   │   ├── dlc-then-ec2-devflow.rst
│   │   │   ├── dlc-then-ecs-devflow.rst
│   │   │   ├── dlc-then-eks-devflow.rst
│   │   │   ├── ec2-then-ec2-devflow.rst
│   │   │   ├── misc-tensorflow-neuron.rst
│   │   │   ├── misc-tensorflow-neuron.txt
│   │   │   ├── neo-then-hosting-devflow.rst
│   │   │   ├── setup/
│   │   │   │   ├── prev-releases/
│   │   │   │   │   ├── neuron-1.14.2-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.15.0-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.15.1-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.15.2-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.16.3-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.17.0-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.17.1-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.17.2-tensorflow-install.rst
│   │   │   │   │   ├── neuron-1.18.0-tensorflow-install.rst
│   │   │   │   │   └── neuron-1.19.0-tensorflow-install.rst
│   │   │   │   ├── tensorflow-install-prev-al2023.rst
│   │   │   │   ├── tensorflow-install-prev-u20.rst
│   │   │   │   ├── tensorflow-install-prev-u22.rst
│   │   │   │   ├── tensorflow-install-prev.rst
│   │   │   │   ├── tensorflow-install.rst
│   │   │   │   ├── tensorflow-update-u20.rst
│   │   │   │   ├── tensorflow-update-u22.rst
│   │   │   │   └── tensorflow-update.rst
│   │   │   ├── tensorflow2-accelerated-ops.rst
│   │   │   ├── tf2_faq.rst
│   │   │   └── tutorials/
│   │   │       ├── bert_demo/
│   │   │       │   ├── bert_demo.rst
│   │   │       │   ├── glue_mrpc_dev.tsv
│   │   │       │   └── mrpc.proto
│   │   │       ├── index.rst
│   │   │       ├── k8s_bert_demo/
│   │   │       │   └── Dockerfile.tfserving_example
│   │   │       ├── tensorflow-tutorial-setup.rst
│   │   │       ├── tutorials-tensorflow-neuron.rst
│   │   │       ├── tutorials-tensorflow-neuron.txt
│   │   │       ├── tutorials-tensorflow-nlp.rst
│   │   │       └── tutorials-tensorflow-utilizing-neuron-capabilities.rst
│   │   ├── tensorflow-neuron-inference.rst
│   │   ├── tensorflow-neuron-inference.txt
│   │   ├── tensorflow-neuronx/
│   │   │   ├── api-reference-guide.rst
│   │   │   ├── api-reference-guide.txt
│   │   │   ├── misc-tensorflow-neuronx.rst
│   │   │   ├── misc-tensorflow-neuronx.txt
│   │   │   ├── setup/
│   │   │   │   ├── index.rst
│   │   │   │   ├── prev-releases/
│   │   │   │   │   ├── neuronx-2.8.0-tensorflow-install.rst
│   │   │   │   │   └── neuronx-2.9.0-tensorflow-install.rst
│   │   │   │   ├── tensorflow-install-prev-al2.rst
│   │   │   │   ├── tensorflow-install-prev-al2023.rst
│   │   │   │   ├── tensorflow-install-prev-u20.rst
│   │   │   │   ├── tensorflow-install-prev-u22.rst
│   │   │   │   ├── tensorflow-neuronx-install.rst
│   │   │   │   ├── tensorflow-update-al2-dlami.rst
│   │   │   │   ├── tensorflow-update-al2.rst
│   │   │   │   ├── tensorflow-update-u20-dlami.rst
│   │   │   │   ├── tensorflow-update-u20.rst
│   │   │   │   └── tensorflow-update-u22.rst
│   │   │   ├── tf-neuronx-auto-replication-api.rst
│   │   │   ├── tfneuronx-python-tracing-api.rst
│   │   │   ├── tfnx-analyze-model-api.rst
│   │   │   └── tutorials/
│   │   │       ├── tutorial-tensorflowx-serving-NeuronRT-Visible-Cores.rst
│   │   │       ├── tutorials-tensorflow-neuronx.rst
│   │   │       └── tutorials-tensorflow-neuronx.txt
│   │   ├── tensorflow-neuronx-inference.rst
│   │   ├── tensorflow-neuronx-inference.txt
│   │   ├── tensorflow-setup.rst
│   │   └── tensorflow-setup.txt
│   ├── torch-neuron/
│   │   ├── additional-examples-inference-torch-neuron.rst
│   │   ├── additional-examples-inference-torch-neuron.txt
│   │   ├── api-compilation-python-api.rst
│   │   ├── api-core-placement.rst
│   │   ├── api-reference-guide-torch-neuron.rst
│   │   ├── api-reference-guide-torch-neuron.txt
│   │   ├── api-torch-neuron-dataparallel-api.rst
│   │   ├── developer-guide-torch-neuron.rst
│   │   ├── developer-guide-torch-neuron.txt
│   │   ├── guides/
│   │   │   ├── core-placement/
│   │   │   │   └── torch-core-placement.rst
│   │   │   └── torch-lstm-support.rst
│   │   ├── index.rst
│   │   ├── inference-torch-neuron.rst
│   │   ├── misc-inference-torch-neuron.rst
│   │   ├── misc-inference-torch-neuron.txt
│   │   ├── placement.py
│   │   ├── setup/
│   │   │   ├── index.rst
│   │   │   ├── prev-releases/
│   │   │   │   ├── neuron-1.14.2-pytorch-install.rst
│   │   │   │   ├── neuron-1.15.0-pytorch-install.rst
│   │   │   │   ├── neuron-1.15.1-pytorch-install.rst
│   │   │   │   ├── neuron-1.15.2-pytorch-install.rst
│   │   │   │   ├── neuron-1.16.1-pytorch-install.rst
│   │   │   │   ├── neuron-1.16.2-pytorch-install.rst
│   │   │   │   ├── neuron-1.16.3-pytorch-install.rst
│   │   │   │   ├── neuron-1.17.2-pytorch-install.rst
│   │   │   │   ├── neuron-1.18.0-pytorch-install.rst
│   │   │   │   ├── neuron-1.19.0-pytorch-install.rst
│   │   │   │   ├── neuron-2.3.0-pytorch-install.rst
│   │   │   │   ├── neuron-2.4.0-pytorch-install.rst
│   │   │   │   └── neuron-2.5.0-pytorch-install.rst
│   │   │   ├── pytorch-install-cxx11.rst
│   │   │   ├── pytorch-install-prev-al2.rst
│   │   │   ├── pytorch-install-prev-al2023.rst
│   │   │   ├── pytorch-install-prev-u20.rst
│   │   │   ├── pytorch-install-prev-u22.rst
│   │   │   ├── pytorch-install-prev.rst
│   │   │   ├── pytorch-install.rst
│   │   │   ├── pytorch-update-al2-dlami.rst
│   │   │   ├── pytorch-update-al2023.rst
│   │   │   ├── pytorch-update-u20-dlami.rst
│   │   │   ├── pytorch-update-u20.rst
│   │   │   ├── pytorch-update-u22.rst
│   │   │   └── pytorch-update.rst
│   │   ├── torch-neuron-dataparallel-example-default.rst
│   │   ├── torch-neuron-dataparallel-example-dim-neq-zero.rst
│   │   ├── torch-neuron-dataparallel-example-disable-dynamic-batching.rst
│   │   ├── torch-neuron-dataparallel-example-dynamic-batching.rst
│   │   ├── torch-neuron-dataparallel-example-specify-ncs.rst
│   │   ├── troubleshooting-guide.rst
│   │   └── tutorials/
│   │       ├── neuroncore_pipeline_pytorch.rst
│   │       ├── pytorch-tutorial-setup.rst
│   │       ├── transformers-marianmt.rst
│   │       ├── tutorial-libtorch.rst
│   │       ├── tutorial-torchserve.rst
│   │       ├── tutorial_source_instructions/
│   │       │   ├── run_libtorch.sh
│   │       │   └── run_torchserve_u20.sh
│   │       ├── tutorials-inference-torch-neuron.rst
│   │       ├── tutorials-inference-torch-neuron.txt
│   │       ├── tutorials-torch-neuron-computervision.rst
│   │       ├── tutorials-torch-neuron-nlp.rst
│   │       └── tutorials-utilizing-neuron-capabilities.rst
│   ├── transformers-neuronx/
│   │   ├── api-reference-guide.rst
│   │   ├── api-reference-guide.txt
│   │   ├── developer-guide.rst
│   │   ├── developer-guide.txt
│   │   ├── index.rst
│   │   ├── setup/
│   │   │   └── index.rst
│   │   ├── transformers-neuronx-api-reference.rst
│   │   ├── transformers-neuronx-developer-guide-for-continuous-batching.rst
│   │   ├── transformers-neuronx-developer-guide.rst
│   │   ├── transformers-neuronx-misc.rst
│   │   ├── transformers-neuronx-misc.txt
│   │   ├── transformers-neuronx-tutorials.rst
│   │   ├── transformers-neuronx-tutorials.txt
│   │   └── transformers-neuronx.txt
│   └── tutorials/
│       ├── finetune_t5.rst
│       ├── finetuning_llama2_7b_ptl.rst
│       ├── gpt3_neuronx_nemo_megatron_pretraining.rst
│       ├── megatron_gpt_pretraining.rst
│       ├── multinode-training-model-profiling.rst
│       ├── nxd-source-code/
│       │   ├── gpt_neox_tp_zero1/
│       │   │   ├── gpt_neox_20b.sh
│       │   │   └── gpt_neox_6_9b.sh
│       │   └── llama_tp_pp_ptl/
│       │       ├── llama_2_13b.sh
│       │       ├── llama_2_70b.sh
│       │       ├── llama_2_7b.sh
│       │       └── llama_tp_pp_ptl_setup.sh
│       ├── ssd300_demo/
│       │   ├── requirements.txt
│       │   ├── ssd300_demo.rst
│       │   ├── ssd300_detection.py
│       │   ├── ssd300_evaluation.py
│       │   ├── ssd300_evaluation_client.py
│       │   └── ssd300_model.py
│       ├── training-gpt-neox-20b.rst
│       ├── training-gpt-neox.rst
│       ├── training_codegen25_7b.rst
│       ├── training_llama2_tp_pp_ptl.rst
│       └── tutorial_source_code/
│           └── t5_finetuning/
│               ├── t5_finetuning_32_worker_training_code.sh
│               ├── t5_finetuning_multi_worker_training_code.sh
│               ├── t5_finetuning_setup_code.sh
│               ├── t5_finetuning_single_worker_training_code.sh
│               └── t5_modify_run_summarization_code.sh
├── audit-report.md
├── build.sh
├── compiler/
│   ├── error-codes/
│   │   ├── EARG001.rst
│   │   ├── EBIR023.rst
│   │   ├── EBVF030.rst
│   │   ├── EHCA005.rst
│   │   ├── EOOM001.rst
│   │   ├── EOOM002.rst
│   │   ├── ESFH002.rst
│   │   ├── ESPP004.rst
│   │   ├── ESPP047.rst
│   │   ├── EUOC002.rst
│   │   ├── EVRF001.rst
│   │   ├── EVRF004.rst
│   │   ├── EVRF005.rst
│   │   ├── EVRF006.rst
│   │   ├── EVRF007.rst
│   │   ├── EVRF009.rst
│   │   ├── EVRF010.rst
│   │   ├── EVRF011.rst
│   │   ├── EVRF013.rst
│   │   ├── EVRF015.rst
│   │   ├── EVRF016.rst
│   │   ├── EVRF017.rst
│   │   ├── EVRF018.rst
│   │   ├── EVRF019.rst
│   │   ├── EVRF022.rst
│   │   ├── EVRF031.rst
│   │   ├── EXSP001.rst
│   │   ├── EXTP004.rst
│   │   └── index.rst
│   ├── index.rst
│   ├── neuron-cc/
│   │   ├── api-reference-guide.rst
│   │   ├── command-line-reference.rst
│   │   ├── developer-guide.rst
│   │   └── faq.rst
│   ├── neuron-cc.rst
│   ├── neuronx-cc/
│   │   ├── api-reference-guide/
│   │   │   └── index.rst
│   │   ├── developer-guide.rst
│   │   ├── faq.rst
│   │   └── how-to-convolution-in-unet.rst
│   └── neuronx-cc.rst
├── conf.py
├── containers/
│   ├── container-deployment-flows.rst
│   ├── container-sm-hosting-devflow.rst
│   ├── developerflows.rst
│   ├── developerflows.txt
│   ├── dlc-then-customize-devflow.rst
│   ├── dlc-then-ec2-devflow.rst
│   ├── dlc-then-ecs-devflow.rst
│   ├── dlc-then-eks-devflow.rst
│   ├── dlc-then-k8s-devflow.rst
│   ├── docker-example/
│   │   ├── Dockerfile.device-plugin
│   │   ├── index.rst
│   │   ├── inference/
│   │   │   ├── Dockerfile-inference
│   │   │   ├── Dockerfile-inference-dlc
│   │   │   ├── Dockerfile-inference-dlc.rst
│   │   │   ├── Dockerfile-libmode
│   │   │   ├── Dockerfile-libmode.rst
│   │   │   ├── Dockerfile-tf-serving.rst
│   │   │   ├── Dockerfile.mxnet-serving
│   │   │   ├── Dockerfile.tf-serving
│   │   │   ├── config-properties.rst
│   │   │   ├── config.properties
│   │   │   ├── dockerd-libmode-entrypoint.rst
│   │   │   ├── dockerd-libmode-entrypoint.sh
│   │   │   ├── torchserve-neuron.rst
│   │   │   └── torchserve-neuron.sh
│   │   ├── training/
│   │   │   ├── Dockerfile-training-dlc
│   │   │   ├── Dockerfile-trainium-dlc.rst
│   │   │   ├── mlp.rst
│   │   │   ├── mlp_train.py
│   │   │   └── model.py
│   │   └── v1/
│   │       └── inference/
│   │           ├── Dockerfile-app-rt-diff.rst
│   │           ├── Dockerfile-app-rt-same.rst
│   │           ├── Dockerfile-neuron-rtd.rst
│   │           ├── Dockerfile-torch-neuron.rst
│   │           ├── Dockerfile.app-rt-diff
│   │           ├── Dockerfile.neuron-rtd
│   │           ├── Dockerfile.torch-neuron
│   │           ├── dockerd-entrypoint-app-rt-same.rst
│   │           └── dockerd-entrypoint.sh
│   ├── ec2-then-ec2-devflow.rst
│   ├── ec2.rst
│   ├── faq-troubleshooting-releasenote.rst
│   ├── faq.rst
│   ├── files/
│   │   ├── index-dra.rst
│   │   ├── manifests/
│   │   │   ├── clusterrole.yaml
│   │   │   ├── clusterrolebinding.yaml
│   │   │   ├── daemonset.yaml
│   │   │   ├── deviceclass.yaml
│   │   │   ├── namespace.yaml
│   │   │   └── serviceaccount.yaml
│   │   ├── scripts/
│   │   │   └── install-dra-driver.sh
│   │   └── specs/
│   │       ├── 1x4-connected-devices.yaml
│   │       ├── 2-node-inference-us.yaml
│   │       ├── 4-node-inference-us.yaml
│   │       ├── all-devices.yaml
│   │       ├── lnc-setting-trn2.yaml
│   │       ├── specific-driver-version.yaml
│   │       └── us-and-lnc-config.yaml
│   ├── get-started/
│   │   ├── quickstart-configure-deploy-dlc.rst
│   │   └── quickstart-pytorch-inference-dlc.rst
│   ├── getting-started.rst
│   ├── how-to/
│   │   └── how-to-ultraserver.rst
│   ├── index.rst
│   ├── k8.rst
│   ├── kubernetes-getting-started.rst
│   ├── locate-neuron-dlc-image.rst
│   ├── neo-then-hosting-devflow.rst
│   ├── neuron-dra.rst
│   ├── neuron-plugins.rst
│   ├── neuron_dlc_images.csv
│   ├── troubleshooting.rst
│   ├── tutorial-docker-runtime1.0.rst
│   ├── tutorials/
│   │   ├── build-run-neuron-container.rst
│   │   ├── inference/
│   │   │   ├── index.rst
│   │   │   ├── index.txt
│   │   │   ├── k8s_rn50_demo.rst
│   │   │   └── tutorial-infer.rst
│   │   ├── k8s-default-scheduler.rst
│   │   ├── k8s-multiple-scheduler.rst
│   │   ├── k8s-neuron-device-plugin.rst
│   │   ├── k8s-neuron-helm-chart.rst
│   │   ├── k8s-neuron-monitor.rst
│   │   ├── k8s-neuron-problem-detector-and-recovery-irsa.rst
│   │   ├── k8s-neuron-problem-detector-and-recovery.rst
│   │   ├── k8s-neuron-scheduler-flow.rst
│   │   ├── k8s-neuron-scheduler.rst
│   │   ├── k8s-prerequisite.rst
│   │   ├── k8s-setup.rst
│   │   ├── training/
│   │   │   ├── index.rst
│   │   │   ├── index.txt
│   │   │   ├── k8s_mlp_train_demo.rst
│   │   │   └── tutorial-training.rst
│   │   ├── tutorial-docker-env-setup.rst
│   │   └── tutorial-oci-hook.rst
│   └── tutorials.rst
├── devflows/
│   ├── aws-batch-flows.rst
│   ├── aws-batch-flows.txt
│   ├── dlc-then-customize-devflow.rst
│   ├── ec2-flows.rst
│   ├── ec2-flows.txt
│   ├── ecs-flows.rst
│   ├── eks-flows.rst
│   ├── index.rst
│   ├── inference/
│   │   ├── aws-batch-flows.rst
│   │   ├── aws-batch-flows.txt
│   │   ├── byoc-hosting-devflow-inf2.rst
│   │   ├── byoc-hosting-devflow.rst
│   │   ├── container-sm-hosting-devflow.rst
│   │   ├── dev-flows.rst
│   │   ├── dlc-then-ec2-devflow.rst
│   │   ├── dlc-then-ecs-devflow.rst
│   │   ├── dlc-then-eks-devflow.rst
│   │   ├── dlc-then-k8s-devflow.rst
│   │   ├── ec2-flows.rst
│   │   ├── ec2-flows.txt
│   │   ├── ec2-then-ec2-devflow-inf2.rst
│   │   ├── ec2-then-ec2-devflow.rst
│   │   ├── env-setup-text.rst
│   │   ├── neo-then-hosting-devflow.rst
│   │   ├── parallelcluster-flows.rst
│   │   ├── parallelcluster-flows.txt
│   │   ├── sagemaker-flows.rst
│   │   └── sagemaker-flows.txt
│   ├── parallelcluster-flows.rst
│   ├── parallelcluster-flows.txt
│   ├── plugins/
│   │   ├── npd-ecs-flows.rst
│   │   └── npd-ecs-flows.txt
│   ├── sagemaker-flows.rst
│   ├── setup/
│   │   ├── ecs-flows.rst
│   │   ├── ecs-flows.txt
│   │   ├── eks-flows.rst
│   │   └── eks-flows.txt
│   ├── third-party-solutions.rst
│   └── training/
│       ├── aws-batch-flows.rst
│       ├── aws-batch-flows.txt
│       ├── batch/
│       │   └── batch-training.rst
│       ├── dlc-then-ecs-devflow.rst
│       ├── ec2/
│       │   └── ec2-training.rst
│       ├── ec2-flows.rst
│       ├── ec2-flows.txt
│       ├── parallelcluster/
│       │   └── parallelcluster-training.rst
│       ├── parallelcluster-flows.rst
│       ├── parallelcluster-flows.txt
│       ├── sagemaker-flows.rst
│       ├── sagemaker-flows.txt
│       └── sm-devflow/
│           └── sm-training-devflow.rst
├── dlami/
│   └── index.rst
├── frameworks/
│   ├── index.rst
│   ├── jax/
│   │   ├── api-reference-guide/
│   │   │   ├── index.rst
│   │   │   └── neuron-envvars.rst
│   │   ├── index.rst
│   │   └── setup/
│   │       ├── jax-neuronx-known-issues.rst
│   │       └── jax-setup.rst
│   └── torch/
│       ├── about/
│       │   └── index.rst
│       ├── guide-torch-neuron-vs-torch-neuronx-inference.rst
│       ├── index.rst
│       ├── inference-torch-neuronx.rst
│       ├── pytorch-native-overview.rst
│       ├── torch-neuronx/
│       │   ├── additional-examples-inference-torch-neuronx.rst
│       │   ├── additional-examples-training.rst
│       │   ├── api-reference-guide/
│       │   │   ├── inference/
│       │   │   │   ├── api-torch-neuronx-analyze.rst
│       │   │   │   ├── api-torch-neuronx-async-lazy-load.rst
│       │   │   │   ├── api-torch-neuronx-core-placement.rst
│       │   │   │   ├── api-torch-neuronx-data-parallel.rst
│       │   │   │   ├── api-torch-neuronx-replace-weights.rst
│       │   │   │   ├── api-torch-neuronx-trace.rst
│       │   │   │   └── inference-api-guide-torch-neuronx.rst
│       │   │   ├── torch-neuronx-profiling-api.rst
│       │   │   └── training/
│       │   │       ├── index.rst
│       │   │       ├── pytorch-neuron-parallel-compile.rst
│       │   │       └── torch-neuron-envvars.rst
│       │   ├── misc-inference-torch-neuronx.rst
│       │   ├── misc-training.rst
│       │   ├── programming-guide/
│       │   │   ├── inference/
│       │   │   │   ├── autobucketing-dev-guide.rst
│       │   │   │   ├── core-placement.rst
│       │   │   │   ├── index.rst
│       │   │   │   └── trace-vs-xla-lazytensor.rst
│       │   │   ├── torch-neuronx-profiling-dev-guide.rst
│       │   │   └── training/
│       │   │       ├── index.rst
│       │   │       ├── pytorch-neuron-debug.rst
│       │   │       └── pytorch-neuron-programming-guide.rst
│       │   ├── pytorch-neuron-supported-operators.rst
│       │   ├── setup/
│       │   │   ├── install-templates/
│       │   │   │   └── pytorch-dev-install.txt
│       │   │   ├── note-setup-general.rst
│       │   │   ├── prev-releases/
│       │   │   │   ├── neuronx-2.7.0-pytorch-install.rst
│       │   │   │   ├── neuronx-2.8.0-pytorch-install.rst
│       │   │   │   └── neuronx-2.9.0-pytorch-install.rst
│       │   │   ├── pytorch-install-prev-al2.rst
│       │   │   ├── pytorch-install-prev-al2023.rst
│       │   │   ├── pytorch-install-prev-u20.rst
│       │   │   ├── pytorch-install-prev-u22.rst
│       │   │   ├── pytorch-install-prev-u24.rst
│       │   │   ├── pytorch-install.rst
│       │   │   ├── pytorch-neuronx-install-cxx11.rst
│       │   │   ├── pytorch-update-al2-dlami.rst
│       │   │   ├── pytorch-update-al2.rst
│       │   │   ├── pytorch-update-al2023.rst
│       │   │   ├── pytorch-update-u20-dlami.rst
│       │   │   ├── pytorch-update-u20.rst
│       │   │   ├── pytorch-update-u22.rst
│       │   │   └── pytorch-update-u24.rst
│       │   ├── setup-trn1-multi-node-execution.rst
│       │   ├── torch-neuronx-dataparallel-example-default.rst
│       │   ├── torch-neuronx-dataparallel-example-dim-neq-zero.rst
│       │   ├── torch-neuronx-dataparallel-example-disable-dynamic-batching.rst
│       │   ├── torch-neuronx-dataparallel-example-dynamic-batching.rst
│       │   ├── torch-neuronx-dataparallel-example-specify-ncs.rst
│       │   ├── training-troubleshooting.rst
│       │   └── tutorials/
│       │       ├── inference/
│       │       │   ├── tutorial-torchserve-neuronx.rst
│       │       │   └── tutorials-torch-neuronx.rst
│       │       ├── note-performance.txt
│       │       └── training/
│       │           ├── analyze_for_training.rst
│       │           ├── bert.rst
│       │           ├── finetune_hftrainer.rst
│       │           ├── mlp.rst
│       │           ├── tutorial_source_code/
│       │           │   ├── analyze_training/
│       │           │   │   └── analyze_training_code.sh
│       │           │   ├── bert_mrpc_finetuning/
│       │           │   │   ├── bert_mrpc_finetuning_converted_checkpoint_training.sh
│       │           │   │   ├── bert_mrpc_finetuning_multi_worker_training_code.sh
│       │           │   │   ├── bert_mrpc_finetuning_setup_code.sh
│       │           │   │   └── bert_mrpc_finetuning_single_worker_training.sh
│       │           │   ├── bert_training/
│       │           │   │   ├── bert_amp_training_code.sh
│       │           │   │   ├── bert_lamb_bf16_training_code.sh
│       │           │   │   ├── bert_lamb_training_code.sh
│       │           │   │   ├── bert_phase2_training_code.sh
│       │           │   │   ├── bert_precompilation_code.sh
│       │           │   │   ├── bert_setup_code.sh
│       │           │   │   ├── bert_setup_code_ph2.sh
│       │           │   │   └── bert_training_code.sh
│       │           │   ├── multi_layer_perceptron_training/
│       │           │   │   └── multi_layer_perceptron_training_code.sh
│       │           │   └── zero1_training/
│       │           │       └── zero1_single_node_training_code.sh
│       │           ├── tutorials-training-torch-neuronx.rst
│       │           └── zero1_gpt2.rst
│       ├── torch-setup.rst
│       └── training-torch-neuronx.rst
├── general/
│   └── faq.rst
├── includes/
│   └── setup/
│       ├── select-framework-note.txt
│       ├── tab-inference-mxnet-neuron-al2.txt
│       ├── tab-inference-mxnet-neuron-al2023.txt
│       ├── tab-inference-mxnet-neuron-u20.txt
│       ├── tab-inference-mxnet-neuron-u22.txt
│       ├── tab-inference-mxnet-neuron.txt
│       ├── tab-inference-tensorflow-neuron-al2.txt
│       ├── tab-inference-tensorflow-neuron-al2023.txt
│       ├── tab-inference-tensorflow-neuron-u20.txt
│       ├── tab-inference-tensorflow-neuron-u22.txt
│       ├── tab-inference-tensorflow-neuronx-al2.txt
│       ├── tab-inference-tensorflow-neuronx-al2023.txt
│       ├── tab-inference-tensorflow-neuronx-u20.txt
│       ├── tab-inference-tensorflow-neuronx-u22.txt
│       ├── tab-inference-torch-neuron-al2.txt
│       ├── tab-inference-torch-neuron-al2023.txt
│       ├── tab-inference-torch-neuron-u20.txt
│       ├── tab-inference-torch-neuron-u22.txt
│       ├── tab-inference-torch-neuron.txt
│       ├── tab-inference-torch-neuronx-al2.txt
│       ├── tab-inference-torch-neuronx-al2023.txt
│       ├── tab-inference-torch-neuronx-u20.txt
│       ├── tab-inference-torch-neuronx-u22.txt
│       └── tab-inference-torch-neuronx-u24.txt
├── index.rst
├── info/
│   └── exclude
├── libraries/
│   ├── index.rst
│   ├── nemo-megatron/
│   │   └── index.rst
│   ├── neuronx-distributed/
│   │   ├── activation_memory_reduction.rst
│   │   ├── activation_memory_reduction_developer_guide.rst
│   │   ├── api-reference-guide-inference.rst
│   │   ├── api-reference-guide-training.rst
│   │   ├── api-reference-guide.rst
│   │   ├── api-reference-guide.txt
│   │   ├── api_guide.rst
│   │   ├── app_notes.rst
│   │   ├── app_notes.txt
│   │   ├── context_parallelism_overview.rst
│   │   ├── developer-guide-inference.rst
│   │   ├── developer-guide-inference.txt
│   │   ├── developer-guide-training.rst
│   │   ├── developer-guide-training.txt
│   │   ├── developer-guide.rst
│   │   ├── developer-guide.txt
│   │   ├── index-inference.rst
│   │   ├── index-training.rst
│   │   ├── lora_finetune_developer_guide.rst
│   │   ├── model_builder_v2_api_reference.rst
│   │   ├── model_optimizer_wrapper_developer_guide.rst
│   │   ├── neuronx-distributed-misc.rst
│   │   ├── neuronx-distributed-misc.txt
│   │   ├── neuronx_distributed_inference_developer_guide.rst
│   │   ├── pipeline_parallelism_overview.rst
│   │   ├── pp_developer_guide.rst
│   │   ├── ptl_developer_guide.rst
│   │   ├── save_load_developer_guide.rst
│   │   ├── setup/
│   │   │   ├── index.rst
│   │   │   └── index.txt
│   │   ├── standard_mixed_precision.rst
│   │   ├── tensor_parallelism_overview.rst
│   │   ├── tp_developer_guide.rst
│   │   └── tutorials/
│   │       ├── finetune_llama3_8b_ptl_lora.rst
│   │       ├── index.rst
│   │       ├── index.txt
│   │       ├── inference.rst
│   │       ├── inference_tutorials.rst
│   │       ├── neuronx_distributed_tutorials.txt
│   │       ├── nxd-source-code/
│   │       │   ├── llama_tp_pp/
│   │       │   │   ├── llama_2_13b.sh
│   │       │   │   ├── llama_2_70b.sh
│   │       │   │   ├── llama_31_70b.sh
│   │       │   │   ├── llama_3_70b.sh
│   │       │   │   └── llama_tp_pp_setup.sh
│   │       │   └── llama_tp_zero1/
│   │       │       ├── llama_2_7b.sh
│   │       │       ├── llama_31_8b.sh
│   │       │       ├── llama_3_8b.sh
│   │       │       └── llama_tp_zero1_setup.sh
│   │       ├── nxd_inference_tutorials.txt
│   │       ├── nxd_training_tutorials.txt
│   │       ├── training.rst
│   │       ├── training_llama_tp_pp.rst
│   │       ├── training_llama_tp_zero1.rst
│   │       └── training_tutorials.rst
│   ├── nxd-inference/
│   │   ├── _templates/
│   │   │   ├── model_card.jinja.rst
│   │   │   └── model_card_qwen3.jinja.rst
│   │   ├── api-guides/
│   │   │   ├── api-guide.rst
│   │   │   ├── api-guide.txt
│   │   │   └── index.rst
│   │   ├── app-notes/
│   │   │   ├── app_notes.txt
│   │   │   ├── index.rst
│   │   │   └── parallelism.rst
│   │   ├── developer_guides/
│   │   │   ├── accuracy-eval-with-datasets.rst
│   │   │   ├── custom-quantization.rst
│   │   │   ├── disaggregated-inference.rst
│   │   │   ├── feature-guide.rst
│   │   │   ├── how-to-use-fpem.rst
│   │   │   ├── index.rst
│   │   │   ├── llm-inference-benchmarking-guide.rst
│   │   │   ├── migrate-from-tnx-to-nxdi.rst
│   │   │   ├── model-reference.rst
│   │   │   ├── moe-arch-deep-dive.rst
│   │   │   ├── nxd-examples-migration-guide.rst
│   │   │   ├── onboarding-models.rst
│   │   │   ├── performance-cli-params.rst
│   │   │   ├── vllm-user-guide-v1.rst
│   │   │   ├── vllm-user-guide.rst
│   │   │   ├── weights-sharding-guide.rst
│   │   │   └── writing-tests.rst
│   │   ├── examples/
│   │   │   └── vllm_client.py
│   │   ├── index.rst
│   │   ├── misc/
│   │   │   ├── index.rst
│   │   │   ├── misc.txt
│   │   │   └── nxdi-troubleshooting.rst
│   │   ├── models/
│   │   │   ├── index.rst
│   │   │   ├── llama3/
│   │   │   │   ├── data/
│   │   │   │   │   └── card_llama33_70b.yml
│   │   │   │   └── llama_33_70b.rst
│   │   │   ├── models.txt
│   │   │   └── qwen3/
│   │   │       ├── data/
│   │   │       │   └── card_qwen3_moe_235b.yml
│   │   │       └── qwen3_moe_235b.rst
│   │   ├── neuron-inference-overview.rst
│   │   ├── nxdi-setup.rst
│   │   ├── overview-index.rst
│   │   ├── setup.txt
│   │   ├── tutorials/
│   │   │   ├── disaggregated-inference-tutorial-1p1d.rst
│   │   │   ├── disaggregated-inference-tutorial.rst
│   │   │   ├── flux-inference-tutorial.ipynb
│   │   │   ├── flux-inpainting-inference-tutorial.ipynb
│   │   │   ├── generating-results-with-performance-cli.ipynb
│   │   │   ├── index.rst
│   │   │   ├── llama4-tutorial-v0.ipynb
│   │   │   ├── llama4-tutorial.ipynb
│   │   │   ├── llama405b_perf_comparison.csv
│   │   │   ├── llama70b_apc_perf_comparison.csv
│   │   │   ├── llama70b_perf_comparison.csv
│   │   │   ├── modules_to_not_convert.json
│   │   │   ├── pixtral-tutorial.ipynb
│   │   │   ├── qwen2-vl-tutorial.ipynb
│   │   │   ├── qwen3-moe-tutorial.ipynb
│   │   │   ├── qwen3-vl-tutorial.ipynb
│   │   │   ├── sd-inference-tutorial.rst
│   │   │   ├── trn1-llama3.1-70b-instruct-accuracy-eval-tutorial.ipynb
│   │   │   ├── trn2-llama3.1-405b-speculative-tutorial.rst
│   │   │   ├── trn2-llama3.1-405b-tutorial.rst
│   │   │   ├── trn2-llama3.1-8b-multi-lora-tutorial.ipynb
│   │   │   ├── trn2-llama3.3-70b-apc-tutorial.ipynb
│   │   │   ├── trn2-llama3.3-70b-dp-tutorial.ipynb
│   │   │   ├── trn2-llama3.3-70b-fp8.rst
│   │   │   ├── trn2-llama3.3-70b-tutorial.rst
│   │   │   └── trn3-gpt-oss-120b-tutorial.rst
│   │   └── vllm/
│   │       ├── index.rst
│   │       ├── quickstart-vllm-offline-serving.rst
│   │       └── quickstart-vllm-online-serving.rst
│   ├── nxd-training/
│   │   ├── api-guide.txt
│   │   ├── api-reference-guide.rst
│   │   ├── app_notes/
│   │   │   ├── nxd-training-amr-appnote.rst
│   │   │   ├── nxd-training-cp-appnote.rst
│   │   │   ├── nxd-training-pp-appnote.rst
│   │   │   └── nxd-training-tp-appnote.rst
│   │   ├── app_notes.rst
│   │   ├── app_notes.txt
│   │   ├── developer-guide.rst
│   │   ├── developer_guides/
│   │   │   ├── cpu_mode_developer_guide.rst
│   │   │   ├── dev-guide.txt
│   │   │   ├── index.rst
│   │   │   ├── migration_nemo_nxdt.rst
│   │   │   ├── migration_nnm_nxdt.rst
│   │   │   ├── nemo_nxdt_mapping.csv
│   │   │   ├── new_dataloader_guide.rst
│   │   │   ├── new_model_guide.rst
│   │   │   ├── nnm_nxdt_mapping.csv
│   │   │   └── optimizer_lr_scheduler_flow.rst
│   │   ├── general/
│   │   │   ├── config_overview.rst
│   │   │   ├── features.rst
│   │   │   ├── installation_guide.rst
│   │   │   ├── known-issues.txt
│   │   │   └── known_issues.rst
│   │   ├── index.rst
│   │   ├── misc.rst
│   │   ├── misc.txt
│   │   ├── overview.rst
│   │   ├── overview.txt
│   │   ├── setup.txt
│   │   └── tutorials/
│   │       ├── checkpoint_conversion.rst
│   │       ├── hf_llama3_70B_pretraining.rst
│   │       ├── hf_llama3_8B_DPO_ORPO.rst
│   │       ├── hf_llama3_8B_SFT.rst
│   │       ├── hf_llama3_8B_SFT_LORA.rst
│   │       ├── hf_llama3_8B_pretraining.rst
│   │       ├── index.rst
│   │       ├── megatron_gpt_pretraining.rst
│   │       └── tutorials.txt
│   └── transformers-neuronx/
│       └── index.rst
├── llms.txt
├── neuron-customops/
│   ├── api-reference-guide/
│   │   ├── api-reference-guide.rst
│   │   └── custom-ops-ref-guide.rst
│   ├── customops-intro.txt
│   ├── index.rst
│   ├── misc-customops.rst
│   ├── programming-guide/
│   │   ├── custom-c++-operators-devguide.rst
│   │   └── programming-guide.rst
│   └── tutorials/
│       ├── customop-mlp-perf-opt.rst
│       ├── customop-mlp-training.rst
│       ├── tutorial_source_code/
│       │   ├── custom_c_mlp_training/
│       │   │   └── custom_c_mlp_training_code.sh
│       │   └── custom_c_perf_optimization/
│       │       └── custom_c_perf_optimization_code.sh
│       └── tutorials.rst
├── neuron-runtime/
│   ├── about/
│   │   ├── collectives.rst
│   │   ├── core-dump.rst
│   │   └── index.rst
│   ├── api/
│   │   ├── debug-stream-api.rst
│   │   ├── index.rst
│   │   ├── ndebug_stream.rst
│   │   ├── ndl.rst
│   │   ├── nec.rst
│   │   ├── neuron_driver_shared.rst
│   │   ├── neuron_driver_shared_tensor_batch_op.rst
│   │   ├── neuron_ds.rst
│   │   ├── nrt-async-api-best-practices.rst
│   │   ├── nrt-async-api-examples.rst
│   │   ├── nrt-async-api-overview.rst
│   │   ├── nrt.rst
│   │   ├── nrt_async.rst
│   │   ├── nrt_async_sendrecv.rst
│   │   ├── nrt_experimental.rst
│   │   ├── nrt_profile.rst
│   │   ├── nrt_status.rst
│   │   ├── nrt_sys_trace.rst
│   │   └── nrt_version.rst
│   ├── configuration-guide.rst
│   ├── explore/
│   │   ├── compute-comm-overlap.rst
│   │   ├── core-dump-deep-dive.rst
│   │   ├── device-memory.rst
│   │   ├── direct-hbm-tensor-alloc.rst
│   │   ├── index.rst
│   │   ├── internode-collective-comm.rst
│   │   ├── intranode-collective-comm.rst
│   │   ├── runtime-performance-tips.rst
│   │   └── work-with-neff-files.rst
│   ├── faq.rst
│   ├── index.rst
│   ├── nrt-configurable-parameters.rst
│   ├── nrt-developer-guide.rst
│   ├── nrt-troubleshoot.rst
│   └── rn.rst
├── nki/
│   ├── _ext/
│   │   └── nki_directives.py
│   ├── _templates/
│   │   ├── nki-custom-class-attr-only-template.rst
│   │   └── nki-custom-class-template.rst
│   ├── api/
│   │   ├── index.rst
│   │   ├── nki/
│   │   │   ├── __init__.py
│   │   │   ├── collectives/
│   │   │   │   └── __init__.py
│   │   │   ├── isa/
│   │   │   │   └── __init__.py
│   │   │   └── language/
│   │   │       └── __init__.py
│   │   ├── nki.api.shared.rst
│   │   ├── nki.collectives.rst
│   │   ├── nki.isa.rst
│   │   ├── nki.isa.rst.bak
│   │   ├── nki.language.rst
│   │   ├── nki.language.tile_size.rst
│   │   ├── nki.rst
│   │   └── nki.simulate.rst
│   ├── deep-dives/
│   │   ├── index.rst
│   │   ├── mxfp-matmul.rst
│   │   ├── nki-aps.rst
│   │   ├── nki-compiler.rst
│   │   ├── nki-dge.rst
│   │   ├── nki-dma-bandwidth-guide.rst
│   │   ├── nki-dynamic-loops.rst
│   │   ├── nki_perf_guide.rst
│   │   └── src/
│   │       └── mxfp-matmul/
│   │           ├── mx_cpu_utils.py
│   │           ├── mx_kernel_utils.py
│   │           ├── mx_kernels.py
│   │           └── mx_toplevel.py
│   ├── examples/
│   │   ├── average_pool2d/
│   │   │   ├── average_pool2d_jax.py
│   │   │   ├── average_pool2d_nki_kernels.py
│   │   │   └── average_pool2d_torch.py
│   │   ├── fused_mamba/
│   │   │   ├── mamba_nki_kernels.py
│   │   │   └── mamba_torch.py
│   │   ├── getting_started_baremetal.py
│   │   ├── getting_started_jax.py
│   │   ├── getting_started_torch.py
│   │   ├── index-case-1.py
│   │   ├── index-case-3.py
│   │   ├── layout-dynamic-loop.py
│   │   ├── layout-loop.py
│   │   ├── layout-pass.py
│   │   ├── layout-violation.py
│   │   ├── matrix_multiplication/
│   │   │   ├── matrix_multiplication_nki_kernels.py
│   │   │   └── matrix_multiplication_torch.py
│   │   ├── simulate/
│   │   │   └── nki_simulate_example.py
│   │   ├── tensor_addition/
│   │   │   └── tensor_addition_nki_kernels.py
│   │   └── transpose2d/
│   │       ├── transpose2d_jax.py
│   │       ├── transpose2d_nki_kernels.py
│   │       └── transpose2d_torch.py
│   ├── get-started/
│   │   ├── about/
│   │   │   ├── data-representation-overview.rst
│   │   │   ├── index.rst
│   │   │   ├── indexing-overview.rst
│   │   │   ├── lnc.rst
│   │   │   ├── memory-hierarchy-overview.rst
│   │   │   ├── nki-dma-overview.rst
│   │   │   └── tiling-overview.rst
│   │   ├── index.rst
│   │   ├── nki-language-guide.rst
│   │   ├── quickstart-implement-run-kernel.rst
│   │   └── setup-env.rst
│   ├── guides/
│   │   ├── architecture/
│   │   │   ├── index.rst
│   │   │   ├── trainium2_arch.rst
│   │   │   ├── trainium3_arch.rst
│   │   │   └── trainium_inferentia2_arch.rst
│   │   ├── framework_custom_op.rst
│   │   ├── how-to-scheduling-apis.rst
│   │   ├── index.rst
│   │   ├── nki_simulator.rst
│   │   ├── tutorials/
│   │   │   ├── average_pool2d.rst
│   │   │   ├── fused_mamba.rst
│   │   │   ├── index.rst
│   │   │   ├── kernel-optimization.rst
│   │   │   ├── matrix_multiplication.rst
│   │   │   └── transpose2d.rst
│   │   └── use-neuron-profile.rst
│   ├── index.rst
│   ├── library/
│   │   ├── about/
│   │   │   └── index.rst
│   │   ├── api/
│   │   │   ├── attention-block-tkg.rst
│   │   │   ├── attention-cte.rst
│   │   │   ├── attention-tkg.rst
│   │   │   ├── blockwise-mm-backward.rst
│   │   │   ├── conv1d.rst
│   │   │   ├── cross-entropy.rst
│   │   │   ├── cumsum.rst
│   │   │   ├── depthwise-conv1d.rst
│   │   │   ├── dynamic-elementwise-add.rst
│   │   │   ├── fg-allgather.rst
│   │   │   ├── fgcc.rst
│   │   │   ├── find-nonzero-indices.rst
│   │   │   ├── index.rst
│   │   │   ├── mlp.rst
│   │   │   ├── moe-cte.rst
│   │   │   ├── moe-tkg.rst
│   │   │   ├── output-projection-cte.rst
│   │   │   ├── output-projection-tkg.rst
│   │   │   ├── qkv.rst
│   │   │   ├── rmsnorm-quant.rst
│   │   │   ├── rope.rst
│   │   │   ├── router-topk.rst
│   │   │   ├── sb2sb-allgather.rst
│   │   │   ├── topk-reduce.rst
│   │   │   └── transformer-tkg.rst
│   │   ├── index.rst
│   │   ├── kernel-utils/
│   │   │   ├── allocator.rst
│   │   │   ├── index.rst
│   │   │   └── tensor-view.rst
│   │   └── specs/
│   │       ├── design-rmsnorm-quant.rst
│   │       └── index.rst
│   ├── migration/
│   │   ├── index.rst
│   │   ├── nki-0-3-0-update-guide.rst
│   │   ├── nki-beta2-migration-guide.rst
│   │   └── nki_block_dimension_migration_guide.rst
│   ├── nki_faq.rst
│   ├── scripts/
│   │   ├── markdown2rst.py
│   │   └── requirements.txt
│   └── test/
│       ├── test_nki_isa_activation.py
│       ├── test_nki_isa_affine_select.py
│       ├── test_nki_isa_bn_stats.py
│       ├── test_nki_isa_copypredicated.py
│       ├── test_nki_isa_dma_copy.py
│       ├── test_nki_isa_dma_transpose.py
│       ├── test_nki_isa_dropout.py
│       ├── test_nki_isa_iota.py
│       ├── test_nki_isa_local_gather.py
│       ├── test_nki_isa_max8.py
│       ├── test_nki_isa_memset.py
│       ├── test_nki_isa_nc_find_index8.py
│       ├── test_nki_isa_nc_match_replace8.py
│       ├── test_nki_isa_nc_matmul.py
│       ├── test_nki_isa_nc_stream_shuffle.py
│       ├── test_nki_isa_nc_transpose.py
│       ├── test_nki_isa_partition_reduce.py
│       ├── test_nki_isa_range_select.py
│       ├── test_nki_isa_reciprocal.py
│       ├── test_nki_isa_reduce.py
│       ├── test_nki_isa_select_reduce.py
│       ├── test_nki_isa_sequence_bounds.py
│       ├── test_nki_isa_tensor_copy.py
│       ├── test_nki_isa_tensor_scalar.py
│       ├── test_nki_isa_tensor_scalar_cumulative.py
│       ├── test_nki_isa_tensor_tensor.py
│       ├── test_nki_isa_tensor_tensor_scan.py
│       ├── test_nki_mask.py
│       ├── test_nki_memory_semantics.py
│       ├── test_nki_nl_add.py
│       ├── test_nki_nl_atomic_rmw.py
│       ├── test_nki_nl_broadcast.py
│       ├── test_nki_nl_dslice.py
│       ├── test_nki_nl_gather_flattened.py
│       ├── test_nki_nl_load_store.py
│       ├── test_nki_nl_load_store_indirect.py
│       ├── test_nki_nl_load_transpose2d.py
│       ├── test_nki_nl_mgrid.py
│       ├── test_nki_simulate_kernel.py
│       ├── test_nki_spmd_grid.py
│       ├── test_psum_modulo_alloc.py
│       └── test_sbuf_modulo_alloc.py
├── release-notes/
│   ├── 2.29.0.rst
│   ├── archive/
│   │   ├── customcxxps/
│   │   │   ├── gpsimd-customop-lib.rst
│   │   │   └── gpsimd-tools.rst
│   │   ├── index.rst
│   │   ├── libneuronxla.rst
│   │   ├── mxnet-neuron.rst
│   │   ├── nemo/
│   │   │   ├── index.rst
│   │   │   └── neuronx-nemo.rst
│   │   ├── neuron-cc/
│   │   │   ├── neuron-cc-ops/
│   │   │   │   ├── index.rst
│   │   │   │   ├── neuron-cc-ops-mxnet.rst
│   │   │   │   ├── neuron-cc-ops-pytorch.rst
│   │   │   │   ├── neuron-cc-ops-tensorflow.rst
│   │   │   │   └── neuron-cc-ops-xla.rst
│   │   │   └── neuron-cc.rst
│   │   ├── neuron1/
│   │   │   ├── _legacy-labels.rst
│   │   │   ├── neuronrelease/
│   │   │   │   └── previous-content.rst
│   │   │   └── prev/
│   │   │       ├── content.rst
│   │   │       └── rn.rst
│   │   ├── tensorboard-neuron.rst
│   │   ├── tensorflow/
│   │   │   ├── tensorflow-modelserver-neuron/
│   │   │   │   ├── tensorflow-modelserver-neuron-v2.rst
│   │   │   │   ├── tensorflow-modelserver-neuron.rst
│   │   │   │   └── tensorflow-modelserver-neuronx.rst
│   │   │   ├── tensorflow-neuron/
│   │   │   │   ├── tensorflow-neuron-v2.rst
│   │   │   │   └── tensorflow-neuron.rst
│   │   │   └── tensorflow-neuronx/
│   │   │       └── tensorflow-neuronx.rst
│   │   └── torch-neuron.rst
│   ├── components/
│   │   ├── compiler.rst
│   │   ├── containers.rst
│   │   ├── dev-tools.rst
│   │   ├── dlamis.rst
│   │   ├── index.rst
│   │   ├── jax.rst
│   │   ├── nki-lib.rst
│   │   ├── nki.rst
│   │   ├── nxd-core.rst
│   │   ├── nxd-inference.rst
│   │   ├── nxd-training.rst
│   │   ├── pytorch.rst
│   │   └── runtime.rst
│   ├── documentation/
│   │   └── neuron-documentation.rst
│   ├── index.rst
│   ├── prev/
│   │   ├── 2.25.0/
│   │   │   ├── compiler.rst
│   │   │   ├── containers.rst
│   │   │   ├── dlami.rst
│   │   │   ├── docs-and-samples.rst
│   │   │   ├── index.rst
│   │   │   ├── nx-jax.rst
│   │   │   ├── nx-pytorch.rst
│   │   │   ├── nxd-core.rst
│   │   │   ├── nxd-inference.rst
│   │   │   ├── nxd-training.rst
│   │   │   ├── runtime.rst
│   │   │   └── tools.rst
│   │   ├── 2.26.0/
│   │   │   ├── containers.rst
│   │   │   ├── dlami.rst
│   │   │   ├── index.rst
│   │   │   ├── nki.rst
│   │   │   ├── nx-jax.rst
│   │   │   ├── nx-pytorch.rst
│   │   │   ├── nxd-core.rst
│   │   │   ├── nxd-inference.rst
│   │   │   ├── runtime.rst
│   │   │   └── tools.rst
│   │   ├── 2.26.1.rst
│   │   ├── 2.27.0/
│   │   │   ├── compiler.rst
│   │   │   ├── containers.rst
│   │   │   ├── dlami.rst
│   │   │   ├── index.rst
│   │   │   ├── nki-lib.rst
│   │   │   ├── nki.rst
│   │   │   ├── nx-pytorch.rst
│   │   │   ├── nxd-inference.rst
│   │   │   ├── runtime.rst
│   │   │   └── tools.rst
│   │   ├── 2.27.1.rst
│   │   ├── 2.28.0.rst
│   │   ├── 2.28.1.rst
│   │   ├── content.rst
│   │   └── rn.rst
│   └── releasecontent.rst
├── requirements-python310.txt
├── requirements-python38.txt
├── requirements.txt
├── setup/
│   ├── index.rst
│   ├── index.txt-back
│   ├── install-templates/
│   │   ├── al2-python.rst
│   │   ├── inf1/
│   │   │   ├── compile_mode.rst
│   │   │   ├── deploy_mode.rst
│   │   │   ├── develop_mode.rst
│   │   │   ├── dlami-enable-neuron-mxnet.rst
│   │   │   ├── dlami-enable-neuron-pytorch.rst
│   │   │   ├── launch-inf1-ami.rst
│   │   │   ├── launch-inf1-dlami-aws-cli.rst
│   │   │   ├── launch-inf1-dlami.rst
│   │   │   ├── neuron-pip-install.rst
│   │   │   ├── neuron-pip-setup.rst
│   │   │   ├── note-setup-cntr.rst
│   │   │   ├── note-setup-general.rst
│   │   │   ├── note-setup-libnrt-warning.rst
│   │   │   └── tensorboard-plugin-neuron-pip-install.rst
│   │   ├── inf2/
│   │   │   ├── dlami-enable-neuron-pytorch.rst
│   │   │   ├── launch-inf2-dlami.rst
│   │   │   └── note-setup-libnrt-warning.rst
│   │   ├── launch-instance.txt
│   │   ├── launch-trn1-dlami.rst
│   │   ├── trn1/
│   │   │   └── dlami-notes.rst
│   │   └── trn1-ga-warning.txt
│   ├── jax/
│   │   ├── dlami.rst
│   │   ├── dlc.rst
│   │   ├── index.rst
│   │   └── manual.rst
│   ├── jax-neuronx.rst
│   ├── legacy-inf1/
│   │   ├── index.rst
│   │   └── pytorch.rst
│   ├── multiframework-dlami.rst
│   ├── mxnet-neuron.rst
│   ├── notebook/
│   │   ├── running-jupyter-notebook-as-script.rst
│   │   └── setup-jupyter-notebook-steps-troubleshooting.rst
│   ├── pytorch/
│   │   ├── dlami.rst
│   │   ├── dlc.rst
│   │   ├── index.rst
│   │   ├── manual.rst
│   │   ├── update-dlami.rst
│   │   ├── update-dlc.rst
│   │   └── update-manual.rst
│   ├── setup-rocky-linux-9.rst
│   ├── setup-troubleshooting.rst
│   ├── torch-neuron-ubuntu20.rst
│   ├── torch-neuron.rst
│   ├── torch-neuronx.rst
│   └── troubleshooting.rst
├── src/
│   ├── benchmark/
│   │   ├── helper_scripts/
│   │   │   ├── llmperf_dp.patch
│   │   │   ├── llmperf_reasoning.patch
│   │   │   └── neuron_perf.patch
│   │   └── tensorflow/
│   │       ├── distilbert-base-uncased-finetuned-sst-2-english_benchmark.py
│   │       └── distilbert-base-uncased-finetuned-sst-2-english_compile.py
│   ├── examples/
│   │   ├── mxnet/
│   │   │   ├── README.md
│   │   │   ├── data_parallel/
│   │   │   │   ├── benchmark_utils.py
│   │   │   │   ├── data_parallel_tutorial.ipynb
│   │   │   │   └── parallel.py
│   │   │   ├── mxnet-gluon-tutorial.ipynb
│   │   │   ├── resnet50/
│   │   │   │   └── resnet50.ipynb
│   │   │   └── resnet50_neuroncore_groups.ipynb
│   │   ├── neuron-monitor/
│   │   │   └── neuron-monitor-grafana.json
│   │   ├── pytorch/
│   │   │   ├── bert_tutorial/
│   │   │   │   ├── README.md
│   │   │   │   ├── THIRD
│   │   │   │   ├── THIRD PARTY LICENSE.txt
│   │   │   │   ├── bert_benchmark_utils.py
│   │   │   │   ├── glue_mrpc_dev.tsv
│   │   │   │   ├── parallel.py
│   │   │   │   ├── tutorial_pretrained_bert.ipynb
│   │   │   │   └── tutorial_pretrained_bert_shared_weights.ipynb
│   │   │   ├── byoc_sm_bert_tutorial/
│   │   │   │   ├── code/
│   │   │   │   │   └── inference.py
│   │   │   │   ├── container/
│   │   │   │   │   └── Dockerfile
│   │   │   │   └── sagemaker_container_neuron.ipynb
│   │   │   ├── libtorch_demo/
│   │   │   │   ├── bert_neuronx/
│   │   │   │   │   ├── compile.py
│   │   │   │   │   └── detect_instance.py
│   │   │   │   ├── clean.sh
│   │   │   │   ├── example_app/
│   │   │   │   │   ├── README.txt
│   │   │   │   │   ├── build.sh
│   │   │   │   │   ├── core_count.hpp
│   │   │   │   │   ├── example_app.cpp
│   │   │   │   │   ├── utils.cpp
│   │   │   │   │   └── utils.hpp
│   │   │   │   ├── neuron.patch
│   │   │   │   ├── run_tests.sh
│   │   │   │   ├── setup.sh
│   │   │   │   ├── tokenizers_binding/
│   │   │   │   │   ├── build.sh
│   │   │   │   │   ├── remote_rust_tokenizer.h
│   │   │   │   │   ├── run.sh
│   │   │   │   │   ├── run_python.sh
│   │   │   │   │   ├── tokenizer_test
│   │   │   │   │   ├── tokenizer_test.cpp
│   │   │   │   │   └── tokenizer_test.py
│   │   │   │   └── trace_bert_neuron.py
│   │   │   ├── mnist_mlp/
│   │   │   │   ├── train_monitor.py
│   │   │   │   └── train_tb.py
│   │   │   ├── neuronx_distributed/
│   │   │   │   └── t5-inference/
│   │   │   │       ├── t5-inference-tutorial.ipynb
│   │   │   │       ├── t5_model_layers.py
│   │   │   │       ├── t5_models.py
│   │   │   │       └── wrapper.py
│   │   │   ├── pipeline_tutorial/
│   │   │   │   └── neuroncore_pipeline_pytorch.ipynb
│   │   │   ├── resnet50.ipynb
│   │   │   ├── resnet50_partition.ipynb
│   │   │   ├── torch-neuronx/
│   │   │   │   ├── bert-base-cased-finetuned-mrpc-inference-on-trn1-tutorial.ipynb
│   │   │   │   ├── resnet50-inference-on-trn1-tutorial.ipynb
│   │   │   │   └── t5-inference-tutorial.ipynb
│   │   │   ├── torchserve/
│   │   │   │   ├── benchmark_bert.py
│   │   │   │   ├── config.json
│   │   │   │   ├── handler_bert.py
│   │   │   │   ├── handler_bert_neuronx.py
│   │   │   │   ├── infer_bert.py
│   │   │   │   ├── torchserve.config
│   │   │   │   ├── trace_bert_neuron.py
│   │   │   │   └── trace_bert_neuronx.py
│   │   │   ├── transformers-marianmt.ipynb
│   │   │   └── yolo_v4.ipynb
│   │   └── tensorflow/
│   │       ├── bert_demo/
│   │       │   ├── LICENSE
│   │       │   ├── README.md
│   │       │   ├── bert_client.py
│   │       │   ├── bert_model.py
│   │       │   ├── bert_model_server.py
│   │       │   ├── bert_no_model.py
│   │       │   ├── bert_server.py
│   │       │   ├── download_mrpc_data.py
│   │       │   ├── glue_mrpc_dev.tsv
│   │       │   ├── latency_printer.py
│   │       │   ├── mrpc.proto
│   │       │   ├── mrpc_feature.py
│   │       │   ├── mrpc_pb2.py
│   │       │   ├── mrpc_pb2_grpc.py
│   │       │   ├── protoc.sh
│   │       │   ├── setup.py
│   │       │   ├── tokenization.py
│   │       │   ├── tune_save.sh
│   │       │   └── uncased_L-24_H-1024_A-16.vocab.txt
│   │       ├── huggingface_bert/
│   │       │   └── huggingface_bert.ipynb
│   │       ├── k8s_bert_demo/
│   │       │   ├── Dockerfile.tfserving_example
│   │       │   ├── README.md
│   │       │   ├── bert_client.py
│   │       │   └── bert_service.yml
│   │       ├── keras_resnet50/
│   │       │   ├── LICENSE
│   │       │   ├── README.md
│   │       │   ├── fp32tofp16.py
│   │       │   ├── full_sweep
│   │       │   ├── gen_resnet50_keras.py
│   │       │   ├── infer_resnet50_keras.py
│   │       │   ├── infer_resnet50_keras_loadtest.py
│   │       │   ├── keras_resnet50.ipynb
│   │       │   ├── optimize_for_inference.py
│   │       │   ├── pb2sm_compile.py
│   │       │   └── run_all
│   │       ├── openpose_demo/
│   │       │   └── openpose.ipynb
│   │       ├── ssd300_demo/
│   │       │   ├── README.md
│   │       │   ├── ssd300_detection.py
│   │       │   ├── ssd300_evaluation.py
│   │       │   ├── ssd300_evaluation_client.py
│   │       │   └── ssd300_model.py
│   │       ├── tensorflow-neuronx/
│   │       │   └── tfneuronx-roberta-base-tutorial.ipynb
│   │       ├── tensorflow_resnet50/
│   │       │   └── resnet50.ipynb
│   │       ├── tensorflow_serving_tutorial.rst
│   │       ├── yolo_v3_demo/
│   │       │   ├── yolo_v3.ipynb
│   │       │   └── yolo_v3_coco_saved_model.py
│   │       └── yolo_v4_demo/
│   │           ├── README.md
│   │           ├── evaluate.ipynb
│   │           └── yolo_v4_coco_saved_model.py
│   ├── helperscripts/
│   │   ├── installationScripts/
│   │   │   └── python_instructions.txt
│   │   ├── n2-helper.py
│   │   ├── n2-manifest.json
│   │   ├── neuron-releases-manifest.json
│   │   ├── neuron-setup-example.py
│   │   ├── neuronsetuphelper.py
│   │   └── release-manifest-def.py
│   ├── k8/
│   │   ├── bert_service.yml
│   │   ├── k8s-neuron-device-plugin-rbac.yml
│   │   ├── k8s-neuron-device-plugin.yml
│   │   ├── k8s-neuron-monitor-daemonset.yml
│   │   ├── k8s-neuron-scheduler-configmap.yml
│   │   ├── k8s-neuron-scheduler-eks.yml
│   │   ├── k8s-neuron-scheduler.yml
│   │   ├── k8s-ultraserver-init-script.sh
│   │   ├── my-scheduler.yml
│   │   └── neuron-problem-detector/
│   │       ├── k8s-neuron-problem-detector-and-recovery-config.yml
│   │       ├── k8s-neuron-problem-detector-and-recovery-rbac.yml
│   │       └── k8s-neuron-problem-detector-and-recovery.yml
│   ├── libnrt/
│   │   ├── README.md
│   │   └── include/
│   │       ├── ndl/
│   │       │   ├── ndl.h
│   │       │   ├── neuron_driver_shared.h
│   │       │   └── neuron_driver_shared_tensor_batch_op.h
│   │       └── nrt/
│   │           ├── ndebug_stream.h
│   │           ├── nds/
│   │           │   └── neuron_ds.h
│   │           ├── nec.h
│   │           ├── nrt.h
│   │           ├── nrt_async.h
│   │           ├── nrt_async_sendrecv.h
│   │           ├── nrt_experimental.h
│   │           ├── nrt_profile.h
│   │           ├── nrt_status.h
│   │           ├── nrt_sys_trace.h
│   │           └── nrt_version.h
│   ├── neuron-gatherinfo/
│   │   ├── LICENSE
│   │   ├── clear_params_tfpb.py
│   │   ├── mx_neuron_check_model.py
│   │   ├── neuron-gatherinfo.py
│   │   └── tf_neuron_check_model.py
│   └── neuronperf/
│       ├── LICENSE
│       ├── README.md
│       ├── build.sh
│       ├── conf.py
│       ├── model_neuron_b1.csv
│       ├── pyproject.toml
│       ├── src/
│       │   └── neuronperf/
│       │       ├── __init__.py
│       │       ├── __version__.py
│       │       ├── benchmarking.py
│       │       ├── compile_constants.py
│       │       ├── cpu/
│       │       │   ├── __init__.py
│       │       │   └── cpu.py
│       │       ├── logging.py
│       │       ├── model_index.py
│       │       ├── mxnet/
│       │       │   ├── __init__.py
│       │       │   └── mxnet.py
│       │       ├── py.typed
│       │       ├── reporting.py
│       │       ├── scripts/
│       │       │   ├── __init__.py
│       │       │   └── run_benchmark_file.py
│       │       ├── tensorflow/
│       │       │   ├── __init__.py
│       │       │   └── tensorflow.py
│       │       ├── timing.py
│       │       └── torch/
│       │           ├── __init__.py
│       │           └── torch.py
│       └── test/
│           └── test_neuronperf.py
├── static/
│   ├── google673a8c4fbaa024d8.html
│   ├── robots.txt
│   └── sitemap1.xml
└── tools/
    ├── index.rst
    ├── neuron-explorer/
    │   ├── get-started.rst
    │   ├── how-to-link-view-source-code.rst
    │   ├── how-to-profile-workload.rst
    │   ├── index.rst
    │   ├── migration-faq.rst
    │   ├── overview-ai-recommendations.rst
    │   ├── overview-database-viewer.rst
    │   ├── overview-device-profiles.rst
    │   ├── overview-hierarchy-view.rst
    │   ├── overview-memory-viewer.rst
    │   ├── overview-summary-page.rst
    │   ├── overview-system-profiles.rst
    │   ├── overview-tensor-viewer.rst
    │   └── view-perfetto.rst
    ├── neuron-sys-tools/
    │   ├── index.rst
    │   ├── nccom-test.rst
    │   ├── neuron-ls.rst
    │   ├── neuron-monitor-user-guide.rst
    │   ├── neuron-sysfs-user-guide.rst
    │   └── neuron-top-user-guide.rst
    ├── profiler/
    │   ├── neuron-profile-user-guide.rst
    │   └── neuron-profiler-2-0-beta-user-guide.rst
    ├── tensorboard/
    │   ├── getting-started-tensorboard-neuronx-plugin.rst
    │   └── index.rst
    ├── third-party-solutions.rst
    └── tutorials/
        ├── index.rst
        ├── performance-profiling-vllm.rst
        ├── torch-neuronx-profiling-with-tb.rst
        ├── tutorial-neuron-monitor-mnist.rst
        └── tutorial-tensorboard-scalars-mnist.rst